#Proteomics analysis for QE data at individual protein level setwd('/Users/emmatimminsschiffman/Documents/Dissertation/proteomics/DB post-genome/QE analysis') prot.dat<-read.csv('protein expression QE for NMDS.csv', header=T, row.names=1) treatment<-read.csv('treatment groupings.csv', header=T, row.names=1) #screen data for insufficient variables #drop proteins expressed in less than half the oysters prot.dat1<-drop.var(prot.dat, min.fo=8) str(prot.dat1) #2086 proteins left in analysis #drop proteins in over 95% of oysters prot.dat2<-drop.var(prot.dat, max.po=95) str(prot.dat2) #1123 proteins left #drop proteins with only a little variation prot.dat3<-drop.var(prot.dat, min.cv=5) str(prot.dat3) #2424 proteins left #dataset without rare and without super abundant proteins prot.dat3<-drop.var(prot.dat1, max.po=95) str(prot.dat3) #785 proteins left #log(x+1) transformation protdat.tra<-(prot.dat+1) protdat.tra<-data.trans(protdat.tra, method='log', plot=F) protdat1.tra<-(prot.dat1+1) protdat1.tra<-data.trans(protdat1.tra, method='log', plot=F) protdat2.tra<-(prot.dat2+1) protdat2.tra<-data.trans(protdat2.tra, method='log', plot=F) protdat3.tra<-(prot.dat3+1) protdat3.tra<-data.trans(protdat3.tra, method='log', plot=F) #NMDS #all proteins protdat.nmds<-metaMDS(protdat.tra, distance='bray', k=2, trymax=100, autotransform=F) #low stress #calculate loadings vec.prot<-envfit(protdat.nmds$points, protdat.tra, perm=100) ordiplot(protdat.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.prot, p.max=0.01, col='blue', cex=0.5) ordihull(protdat.nmds, treatment[,3], label=T, col='magenta') #anosim protdat.row<-data.stand(prot.dat, method='total', margin='row', plot=F) protdat.d<-vegdist(protdat.row, 'bray') #anosim for pCO2 prot.anosim1<-anosim(protdat.d, grouping=treatment[,1]) summary(prot.anosim1) #R=-0.03125, p=0.606 #anosim for MS prot.anosim2<-anosim(protdat.d, grouping=treatment[,2]) summary(prot.anosim2) #R=-0.01172, p=0.519 #anosim for all treatments prot.anosim3<-anosim(protdat.d, grouping=treatment[,3]) summary(prot.anosim3) #R=-0.02431, p=.586 #drop low abundance protdat1.nmds<-metaMDS(protdat1.tra, distance='bray', k=2, trymax=100, autotransform=F) vec.prot1<-envfit(protdat1.nmds$points, protdat1.tra, perm=100) ordiplot(protdat1.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.prot1, p.max=0.01, col='blue', cex=0.5) ordihull(protdat1.nmds, treatment[,3], label=T, col='magenta') #anosim protdat1.row<-data.stand(prot.dat1, method='total', margin='row', plot=F) protdat1.d<-vegdist(protdat1.row, 'bray') #anosim for pCO2 prot1.anosim1<-anosim(protdat1.d, grouping=treatment[,1]) summary(prot1.anosim1) #R=-0.03181, p=0.625 #anosim for MS prot1.anosim2<-anosim(protdat1.d, grouping=treatment[,2]) summary(prot1.anosim2) #R=-0.01004, p=0.469 #anosim for all treatments prot1.anosim3<-anosim(protdat1.d, grouping=treatment[,3]) summary(prot1.anosim3) #R=-0.0217, p=0.545 #drop high abundance protdat2.nmds<-metaMDS(protdat2.tra, distance='bray', k=2, trymax=100, autotransform=F) vec.prot2<-envfit(protdat2.nmds$points, protdat2.tra, perm=100) ordiplot(protdat2.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.prot2, p.max=0.01, col='blue', cex=0.5) ordihull(protdat2.nmds, treatment[,3], label=T, col='magenta') #anosim protdat2.row<-data.stand(prot.dat2, method='total', margin='row', plot=F) protdat2.d<-vegdist(protdat2.row, 'bray') #anosim for pCO2 prot2.anosim1<-anosim(protdat2.d, grouping=treatment[,1]) summary(prot2.anosim1) #R=0.0106, p=0.389 #anosim for MS prot2.anosim2<-anosim(protdat2.d, grouping=treatment[,2]) summary(prot2.anosim2) #R=-0.04408, p=0.722 #anosim for all treatments prot2.anosim3<-anosim(protdat2.d, grouping=treatment[,3]) summary(prot2.anosim3) #R=0.05208, p=0.274 #drop low and high abundance protdat3.nmds<-metaMDS(protdat3.tra, distance='bray', k=2, trymax=100, autotransform=F) vec.prot3<-envfit(protdat3.nmds$points, protdat3.tra, perm=100) ordiplot(protdat3.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.prot3, p.max=0.01, col='blue', cex=0.5) ordihull(protdat3.nmds, treatment[,3], label=T, col='magenta') #anosim protdat3.row<-data.stand(prot.dat3, method='total', margin='row', plot=F) protdat3.d<-vegdist(protdat3.row, 'bray') #anosim for pCO2 prot3.anosim1<-anosim(protdat3.d, grouping=treatment[,1]) summary(prot3.anosim1) #R=0.02288, p=0.323 #anosim for MS prot3.anosim2<-anosim(protdat3.d, grouping=treatment[,2]) summary(prot3.anosim2) #R=-0.0558, p=0.783 #anosim for all treatments prot3.anosim3<-anosim(protdat3.d, grouping=treatment[,3]) summary(prot3.anosim3) #R=0.04861, p=0.29 #Analysis at GO level GO.dat<-read.csv('biological processes GO for NMDS.csv', header=T, row.names=1) GOdat.tra<-(GO.dat+1) GO.tra<-data.trans(GOdat.tra, method='log', plot=F) #NMDS GOdat.nmds<-metaMDS(GO.tra, distance='bray', k=2, trymax=100, autotransform=F) #low stress #calculate loadings vec.GO<-envfit(GOdat.nmds$points, GO.tra, perm=100) ordiplot(GOdat.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.GO, p.max=0.01, col='blue', cex=0.5) ordihull(GOdat.nmds, treatment[,3], label=T, col='magenta') #anosim GOdat.row<-data.stand(GO.dat, method='total', margin='row', plot=F) GOdat.d<-vegdist(GOdat.row, 'bray') #anosim for pCO2 GO.anosim1<-anosim(GOdat.d, grouping=treatment[,1]) summary(GO.anosim1) #R=0.04408, p=0.273 #anosim for MS GO.anosim2<-anosim(GOdat.d, grouping=treatment[,2]) summary(GO.anosim2) #R=0.06138, p=0.202 #anosim for all treatments GO.anosim3<-anosim(GOdat.d, grouping=treatment[,3]) summary(GO.anosim3) #R=0.1319, p=0.093 #Analysis at GO Slim level slim.dat<-read.csv('biological processes GO Slim for NMDS.csv', header=T, row.names=1) slimdat.tra<-data.trans(slim.dat, method='log', plot=F) #NMDS slimdat.nmds<-metaMDS(slimdat.tra, distance='bray', k=2, trymax=100, autotransform=F) vec.slim<-envfit(slimdat.nmds$points, slimdat.tra, perm=100) ordiplot(slimdat.nmds, choices=c(1,2), type='text', display='sites', xlab='Axis 1', ylab='Axis 2') plot(vec.slim, p.max=0.01, col='blue', cex=0.5) ordihull(slimdat.nmds, treatment[,3], labe=T, col='magenta') #anosim slimdat.row<-data.stand(slim.dat, method='total', margin='row', plot=F) slimdat.d<-vegdist(slimdat.row, 'bray') #anosim for pCO2 slim.anosim1<-anosim(slimdat.d, grouping=treatment[,1]) summary(slim.anosim1) #R=-0.04185, p=0.642 #anosim for MS slim.anosim2<-anosim(slimdat.d, grouping=treatment[,2]) summary(slim.anosim2) #R=0.07645, p=0.168 #anosim for all treatments slim.anosim3<-anosim(slimdat.d, grouping=treatment[,3]) summary(slim.anosim3) #R=0.1111, p=0.135